This notebook contains the key visualizations for the Task Mapping
paper.
library(factoextra)
library(NbClust)
library(cluster)
library(plotly)
library(ggplot2)
library(caret) #for knn
library(e1071) #for svm
library(dplyr)
library(tidyverse)
Very useful decision boundary plotting code from: https://mhahsler.github.io/Introduction_to_Data_Mining_R_Examples/book/classification-alternative-techniques.html#k-nearest-neighbors
decisionplot <- function(model, data, class_var,
predict_type = c("class", "prob"), resolution = 5 * 75) {
# resolution is set to 75 dpi if the image is rendered 5 inces wide.
y <- data %>% pull(class_var)
x <- data %>% dplyr::select(-all_of(class_var))
# resubstitution accuracy
prediction <- predict(model, x, type = predict_type[1])
# LDA returns a list
if(is.list(prediction)) prediction <- prediction$class
prediction <- factor(prediction, levels = levels(y))
cm <- confusionMatrix(data = prediction, reference = y)
acc <- cm$overall["Accuracy"]
# evaluate model on a grid
r <- sapply(x[, 1:2], range, na.rm = TRUE)
xs <- seq(r[1,1], r[2,1], length.out = resolution)
ys <- seq(r[1,2], r[2,2], length.out = resolution)
g <- cbind(rep(xs, each = resolution), rep(ys, time = resolution))
colnames(g) <- colnames(r)
g <- as_tibble(g)
### guess how to get class labels from predict
### (unfortunately not very consistent between models)
cl <- predict(model, g, type = predict_type[1])
# LDA returns a list
if(is.list(cl)) {
prob <- cl$posterior
cl <- cl$class
} else
try(prob <- predict(model, g, type = predict_type[2]))
# we visualize the difference in probability/score between the
# winning class and the second best class.
# don't use probability if predict for the classifier does not support it.
max_prob <- 1
try({
max_prob <- t(apply(prob, MARGIN = 1, sort, decreasing = TRUE))
max_prob <- max_prob[,1] - max_prob[,2]
}, silent = TRUE)
cl <- factor(cl, levels = levels(y))
g <- g %>% add_column(prediction = cl, probability = max_prob)
ggplot(g, mapping = aes_string(
x = colnames(g)[1],
y = colnames(g)[2])) +
geom_raster(mapping = aes(fill = prediction, alpha = probability)) +
geom_contour(mapping = aes(z = as.numeric(prediction)),
bins = length(levels(cl)), size = .5, color = "black") +
geom_point(data = data, mapping = aes_string(
x = colnames(data)[1],
y = colnames(data)[2],
shape = class_var), alpha = .7) +
scale_alpha_continuous(range = c(0,1), limits = c(0,1), guide = "none") +
labs(subtitle = paste("Training accuracy:", round(acc, 2)))
}
Load the Data
task_map <- read_csv('../task_map.csv')
Rows: 102 Columns: 24-- Column specification ----------------------------------------------------------------------------------------
Delimiter: ","
chr (1): task
dbl (23): Q1concept_behav, Q3type_1_planning, Q4type_2_generate, Q6type_5_cc, Q7type_7_battle, Q8type_8_perf...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Plot the Task Map and other Related Images
Draw the task map using PCA & clustering
First, run the PCA
set.seed(1)
pca <- task_map %>% #select(-continuous_questions) %>%
select(-task) %>%
prcomp(center = T)
# get optimal number of clusters -- "silhouette" method
fviz_nbclust(x = pca$x, FUNcluster = stats::kmeans, method = "silhouette") +
labs(subtitle = "Silhouette method")

# get optimal number of clusters
NbClust(data = pca$x, distance = "euclidean",
min.nc = 2, max.nc = 15, method = "kmeans")
*** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.

*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
*******************************************************************
* Among all indices:
* 8 proposed 2 as the best number of clusters
* 4 proposed 3 as the best number of clusters
* 2 proposed 4 as the best number of clusters
* 1 proposed 5 as the best number of clusters
* 2 proposed 7 as the best number of clusters
* 1 proposed 8 as the best number of clusters
* 1 proposed 9 as the best number of clusters
* 1 proposed 10 as the best number of clusters
* 1 proposed 11 as the best number of clusters
* 2 proposed 14 as the best number of clusters
* 1 proposed 15 as the best number of clusters
***** Conclusion *****
* According to the majority rule, the best number of clusters is 2
*******************************************************************
$All.index
KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman Rubin Cindex DB
2 5.0069 61.8549 15.6938 0.5233 220.2327 24147.8630 41.3638 86.4849 6.2479 1.6185 0.4571 1.2204
3 1.1136 43.1918 14.4091 0.1053 376.1613 11780.1299 28.8616 74.7532 8.5042 1.8726 0.4356 1.6546
4 1.8754 37.4068 8.5834 0.9450 564.3520 3309.4161 20.8908 65.2555 15.0669 2.1451 0.4371 1.5369
5 0.9756 32.3249 8.7802 1.0947 661.2027 2000.7902 17.3124 60.0004 17.5862 2.3330 0.4476 1.4305
6 1.1393 29.6479 7.9923 1.6847 764.8096 1043.3448 13.2934 55.0201 18.7310 2.5442 0.4157 1.4520
7 1.6179 27.8029 -2.8124 2.0547 879.0917 463.1614 10.7668 50.7915 20.6066 2.7560 0.3940 1.4904
8 0.2936 22.4846 16.2947 -0.9150 834.8846 933.1268 11.7208 52.3410 19.8582 2.6744 0.4245 1.6856
9 2.6595 24.8540 6.9196 2.2594 1059.3556 130.7662 7.7355 44.6083 26.6681 3.1380 0.3680 1.5430
10 2.3249 24.2416 3.4172 2.7838 1180.4019 49.2743 6.3340 41.5191 31.8821 3.3715 0.3517 1.5127
11 0.9576 22.7199 3.5122 2.1276 1304.8645 17.5983 6.3698 40.0322 36.8754 3.4967 0.3293 1.4787
12 0.6126 21.5316 5.4211 1.6851 1372.6428 10.7761 5.5888 38.5445 34.0191 3.6316 0.3909 1.5414
13 1.1321 21.1404 4.9644 1.8342 1453.7129 5.7122 5.1333 36.3547 36.6380 3.8504 0.3623 1.3717
14 1.5224 20.7488 3.4737 1.9774 1614.4163 1.3707 4.4859 34.4340 45.9769 4.0652 0.3659 1.3532
15 0.8365 20.0450 4.1119 1.5082 1695.3182 0.7119 4.2980 33.1264 46.8036 4.2256 0.3627 1.3665
Silhouette Duda Pseudot2 Beale Ratkowsky Ball Ptbiserial Frey McClain Dunn Hubert SDindex Dindex
2 0.3416 0.7859 17.4368 4.3179 0.0630 43.2424 0.6395 1.5407 0.5524 0.2576 0.0177 2.6413 0.8811
3 0.2475 0.9032 4.1780 1.6751 0.0989 24.9177 0.5615 0.0573 1.2348 0.2463 0.0180 3.1665 0.8164
4 0.2392 3.4388 -24.1128 -9.9884 0.1191 16.3139 0.6027 0.5508 1.3885 0.3242 0.0229 3.0450 0.7672
5 0.2363 1.0237 -0.6933 -0.3582 0.1232 12.0001 0.5974 0.5295 1.5411 0.3370 0.0229 2.8657 0.7359
6 0.2284 0.7922 9.1811 4.1016 0.1316 9.1700 0.5751 1.1979 2.0372 0.2865 0.0241 3.0128 0.7079
7 0.1996 1.6517 -7.4970 -5.4438 0.1359 7.2559 0.5114 -0.7349 2.8564 0.2395 0.0255 3.5435 0.6770
8 0.1472 3.6616 -11.6303 -9.3601 0.1314 6.5426 0.4167 -0.0694 3.9377 0.2289 0.0227 3.9793 0.6884
9 0.2057 2.6105 -12.9556 -8.2752 0.1397 4.9565 0.4635 0.2250 3.8563 0.2228 0.0262 3.7203 0.6326
10 0.1991 2.9698 -13.9289 -8.5409 0.1412 4.1519 0.4546 0.2361 4.3147 0.2268 0.0272 3.6939 0.6094
11 0.1963 1.4043 -2.8789 -3.8615 0.1419 3.6393 0.4470 -1.6112 4.6482 0.1818 0.0277 3.6737 0.5981
12 0.1789 1.5077 -5.7246 -4.9686 0.1432 3.2120 0.4095 -0.2396 5.5124 0.2067 0.0281 3.8811 0.5862
13 0.2014 0.9828 0.2792 0.2633 0.1435 2.7965 0.4372 1.0361 5.0282 0.2030 0.0282 3.4167 0.5716
14 0.1982 0.7026 2.1168 5.4516 0.1457 2.4596 0.4140 0.2069 5.7059 0.2758 0.0287 3.7517 0.5534
15 0.2053 3.0324 -6.0321 -7.1920 0.1450 2.2084 0.4072 -0.0466 6.0546 0.2818 0.0291 3.5052 0.5439
SDbw
2 0.4333
3 0.3542
4 0.3121
5 0.2577
6 0.2645
7 0.2390
8 0.2303
9 0.2057
10 0.1909
11 0.1943
12 0.1785
13 0.1733
14 0.1523
15 0.1491
$All.CriticalValues
CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
2 0.8582 10.5718 0.0000
3 0.8178 8.6863 0.0248
4 0.6634 17.2534 1.0000
5 0.7972 7.6335 1.0000
6 0.8178 7.7954 0.0000
7 0.6446 10.4743 1.0000
8 0.5939 10.9390 1.0000
9 0.6220 12.7618 1.0000
10 0.5939 14.3574 1.0000
11 0.6220 6.0771 1.0000
12 0.7153 6.7676 1.0000
13 0.7475 5.4039 0.9998
14 0.5939 3.4184 0.0000
15 0.5089 8.6854 1.0000
$Best.nc
KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman Rubin Cindex DB
Number_clusters 2.0000 2.0000 8.0000 10.0000 9.000 4.000 3.0000 7.0000 14.0000 7.0000 11.0000 2.0000
Value_Index 5.0069 61.8549 19.1071 2.7838 224.471 7162.088 12.5022 5.7781 9.3389 -0.2934 0.3293 1.2204
Silhouette Duda PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain Dunn Hubert
Number_clusters 2.0000 3.0000 3.000 4.0000 14.0000 3.0000 2.0000 2.0000 2.0000 5.000 0
Value_Index 0.3416 0.9032 4.178 -9.9884 0.1457 18.3247 0.6395 1.5407 0.5524 0.337 0
SDindex Dindex SDbw
Number_clusters 2.0000 0 15.0000
Value_Index 2.6413 0 0.1491
$Best.partition
[1] 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2
[54] 2 1 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 1 1 1 2 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2

kmeans_output <- pca$x %>%
kmeans(centers = 3, nstart = 100)
combined_data <- cbind(task_map,
pca$x, factor(kmeans_output$cluster)) %>%
rename(cluster = `factor(kmeans_output$cluster)`)
fviz_eig(pca)

Standard Task Map Image with All Labels
p <- combined_data %>%
ggplot(aes(
x = PC1,
y = PC2,
label = task,
fill = cluster
)) + geom_point() + geom_label(nudge_y = 0.1, size = 4) +
#+ , alpha=0.05) +
# highlights only the ones in the selected set
# geom_label(
# data = subset(combined_data, task %in% c("NASA Moon survival", "Desert survival")),
# aes(
# x = PC1,
# y = PC2,
# label = task ,
# fill = cluster
# ),
# nudge_y = 0.1,
# size = 2
# )
theme_light(base_size = 24)
p # show the plot

ggsave(plot = p, filename = '../task-map.png')
Saving 28 x 10 in image
Task Map Image Highlighting Specific Subsets (for Illustrative
Purposes)
# An illustrative set to display
display_set <- c('Writing story',
'Advertisement writing',
'Desert survival',
'NASA Moon survival',
'Ultimatum game (various versions)',
'Dictator game and its variants',
'Prisoner\'s Dilemma (various versions)',
'9 Dot Problem',
'Word construction from a subset of letters',
'Typing game',
'Ravens Matrices',
'Euclidean traveling salesperson'
)
# A set of the tasks that are most different
max_diff_set <- c('Putting food into categories',
'9 Dot Problem',
'Shopping plan',
'Mock jury',
'Whac-A-Mole',
'Checkers',
'Reproducing arts',
'Image rating',
'TOPSIM - general mgmt business game',
'Word construction from a subset of letters',
'Minimal Group Paradigm (study diversity)')
# A set of tasks that are the most similar
min_diff_set <- c('Arithmetic problem 1',
'Euclidean traveling salesperson',
'Abstract grid task',
'Mastermind',
'Logic Problem',
'Guessing the correlation',
'Random dot motion',
'Letters-to-numbers problems (cryptography)',
'Computer maze',
'Recall images',
'Recall stories')
# A set of tasks that illustrates opportunities to add new tasks
display_limitations_set <- c('Recall word lists',
'Hidden figures in a picture (Recall Task)',
'Recall images',
'Recall stories',
'Recall videos',
'Writing story',
'Advertisement writing')
p <- combined_data %>%
ggplot(aes(
x = PC1,
y = PC2,
#label = task,
#fill = cluster
)) + geom_point(aes(size = 4)) +
#geom_point(aes(color = cluster, size = 4)) +
#highlights only the ones in the selected set
geom_point(data = subset(combined_data, task %in% display_limitations_set), aes(color = "firebrick1", size = 4)) +
geom_label(
data = subset(combined_data, task %in% display_limitations_set),
aes(
x = PC1,
y = PC2,
label = task
),
nudge_y = 0.1,
size = 4
) +
theme_minimal(base_size = 18) + theme(legend.position = "none")
p

ggsave(plot = p, filename = '../images/task-map_with_new_task_opportunities_highlighted.png')
Saving 28 x 10 in image
Create a cool 3D version
plot_ly(
x = combined_data$PC1,
y = combined_data$PC2,
z = combined_data$PC3,
type = "scatter3d",
mode = "markers", # can use mode = "text"
text = combined_data$task ,
color = combined_data$cluster
)
Create synthetic dependent variable based on the clusters
tasks_with_dv <- subset(combined_data, task %in% max_diff_set) %>%
mutate(
synergy = as.factor(ifelse(cluster == 3 | cluster == 2, 1, 0))
)
combined_data <- combined_data %>%
mutate(
synergy = as.factor(ifelse(cluster == 3 | cluster == 2, 1, 0))
)
Fitting and Visualizing Models for the Task Map.
x <- combined_data %>% select(PC1, PC2, synergy, task)
train <- tasks_with_dv %>% select(PC1, PC2, synergy, task)
model <- train %>% svm(synergy ~ PC1 + PC2, data = ., kernel = "linear")
svmplot <- decisionplot(model, x, class_var = "synergy") +
geom_point(data = train, aes(x = PC1, y = PC2, shape = synergy), color = "darkolivegreen2", show.legend = F) +
geom_label(data = train, aes(label = task ), nudge_y = 0.1, nudge_x = -0.1, size = 3) +
labs(title = "SVM (Linear Kernel)") +
theme_minimal(base_size = 12)
svmplot
ggsave('svmplot_synthetic_data.png')
Saving 7.29 x 4.51 in image

model <- train %>% knn3(synergy ~ PC1 + PC2, data = ., k = 1)
knnplot <- decisionplot(model, x, class_var = "synergy") +
geom_point(data = train, aes(x = PC1, y = PC2, shape = synergy), color = "darkolivegreen2", show.legend = F) +
geom_label(data = train, aes(label = task ), nudge_y = 0.1, nudge_x = -0.1, size = 3) +
labs(title = "kNN (1 neighbor)") +
theme_minimal(base_size = 12)
knnplot
ggsave('knnplot_synthetic_data.png')
Saving 7.29 x 4.51 in image

---
title: "R Notebook for Paper-Related Visualizations"
output: html_notebook
---

This notebook contains the key visualizations for the Task Mapping paper.

```{r}
library(factoextra)
library(NbClust)
library(cluster)
library(plotly)
library(ggplot2)
library(caret) #for knn
library(e1071) #for svm
library(dplyr)
library(tidyverse)
```

Very useful decision boundary plotting code from: https://mhahsler.github.io/Introduction_to_Data_Mining_R_Examples/book/classification-alternative-techniques.html#k-nearest-neighbors
```{r decisionplot}

decisionplot <- function(model, data, class_var, 
  predict_type = c("class", "prob"), resolution = 5 * 75) {
  # resolution is set to 75 dpi if the image is rendered  5 inces wide. 
  
  y <- data %>% pull(class_var)
  x <- data %>% dplyr::select(-all_of(class_var))
  
  # resubstitution accuracy
  prediction <- predict(model, x, type = predict_type[1])
  # LDA returns a list
  if(is.list(prediction)) prediction <- prediction$class
  prediction <- factor(prediction, levels = levels(y))
  
  cm <- confusionMatrix(data = prediction, reference = y)
  acc <- cm$overall["Accuracy"]
  
  # evaluate model on a grid
  r <- sapply(x[, 1:2], range, na.rm = TRUE)
  xs <- seq(r[1,1], r[2,1], length.out = resolution)
  ys <- seq(r[1,2], r[2,2], length.out = resolution)
  g <- cbind(rep(xs, each = resolution), rep(ys, time = resolution))
  colnames(g) <- colnames(r)
  g <- as_tibble(g)
  
  ### guess how to get class labels from predict
  ### (unfortunately not very consistent between models)
  cl <- predict(model, g, type = predict_type[1])
  
  # LDA returns a list
  if(is.list(cl)) { 
    prob <- cl$posterior
    cl <- cl$class
  } else
    try(prob <- predict(model, g, type = predict_type[2]))
  
  # we visualize the difference in probability/score between the 
  # winning class and the second best class.
  # don't use probability if predict for the classifier does not support it.
  max_prob <- 1
  try({
    max_prob <- t(apply(prob, MARGIN = 1, sort, decreasing = TRUE))
    max_prob <- max_prob[,1] - max_prob[,2]
  }, silent = TRUE) 
  
  cl <- factor(cl, levels = levels(y))
  
  g <- g %>% add_column(prediction = cl, probability = max_prob)
  
  ggplot(g, mapping = aes_string(
    x = colnames(g)[1],
    y = colnames(g)[2])) +
    geom_raster(mapping = aes(fill = prediction, alpha = probability)) +
     geom_contour(mapping = aes(z = as.numeric(prediction)), 
      bins = length(levels(cl)), size = .5, color = "black") +
    geom_point(data = data, mapping =  aes_string(
      x = colnames(data)[1],
      y = colnames(data)[2],
      shape = class_var), alpha = .7) + 
    scale_alpha_continuous(range = c(0,1), limits = c(0,1), guide = "none") +  
    labs(subtitle = paste("Training accuracy:", round(acc, 2)))
}
```

# Load the Data
```{r}
task_map <- read_csv('../task_map.csv')
```

# Plot the Task Map and other Related Images

Draw the task map using PCA & clustering

First, run the PCA
```{r, fig.width=16, fig.height=5}
set.seed(1)

pca <- task_map %>% #select(-continuous_questions) %>%
  select(-task) %>%
  prcomp(center = T)

# get optimal number of clusters -- "silhouette" method
fviz_nbclust(x = pca$x, FUNcluster = stats::kmeans, method = "silhouette") +
  labs(subtitle = "Silhouette method")

# get optimal number of clusters
NbClust(data = pca$x, distance = "euclidean",
        min.nc = 2, max.nc = 15, method = "kmeans")

kmeans_output <- pca$x %>% 
  kmeans(centers = 3, nstart = 100)

combined_data <- cbind(task_map,
      pca$x, factor(kmeans_output$cluster)) %>%
  rename(cluster = `factor(kmeans_output$cluster)`)

fviz_eig(pca)
```

Standard Task Map Image with All Labels
```{r, fig.width=14, fig.height=5}
p <- combined_data %>%
  ggplot(aes(
    x = PC1,
    y = PC2,
    label = task,
    fill = cluster
  )) + geom_point() + geom_label(nudge_y = 0.1, size = 4) +
  
  #+ , alpha=0.05) +
# highlights only the ones in the selected set
  # geom_label(
  #   data = subset(combined_data, task %in% c("NASA Moon survival", "Desert survival")),
  #   aes(
  #     x = PC1,
  #     y = PC2,
  #     label = task ,
  #     fill = cluster
  #   ),
  #   nudge_y = 0.1,
  #   size = 2
  # )
 theme_light(base_size = 24)

p # show the plot

ggsave(plot = p, filename = '../task-map.png')
```

Task Map Image Highlighting Specific Subsets (for Illustrative Purposes)
```{r, fig.width=14, fig.height=5}
# An illustrative set to display
display_set <- c('Writing story',
 'Advertisement writing', 
 'Desert survival',
 'NASA Moon survival',
 'Ultimatum game (various versions)',
 'Dictator game and its variants',
 'Prisoner\'s Dilemma (various versions)',
 '9 Dot Problem',
 'Word construction from a subset of letters',
 'Typing game',
 'Ravens Matrices',
 'Euclidean traveling salesperson'
 )

# A set of the tasks that are most different
max_diff_set <- c('Putting food into categories',
 '9 Dot Problem',
 'Shopping plan',
 'Mock jury',
 'Whac-A-Mole',
 'Checkers',
 'Reproducing arts',
 'Image rating',
 'TOPSIM - general mgmt business game',
 'Word construction from a subset of letters',
 'Minimal Group Paradigm (study diversity)')

# A set of tasks that are the most similar
min_diff_set <- c('Arithmetic problem 1',
 'Euclidean traveling salesperson',
 'Abstract grid task',
 'Mastermind',
 'Logic Problem',
 'Guessing the correlation',
 'Random dot motion',
 'Letters-to-numbers problems (cryptography)',
 'Computer maze',
 'Recall images',
 'Recall stories')

# A set of tasks that illustrates opportunities to add new tasks
display_limitations_set <- c('Recall word lists',
                             'Hidden figures in a picture (Recall Task)',
                             'Recall images',
                             'Recall stories',
                             'Recall videos',
                             'Writing story',
                             'Advertisement writing')

p <- combined_data %>%
  ggplot(aes(
    x = PC1,
    y = PC2,
    #label = task,
    #fill = cluster
    )) + geom_point(aes(size = 4)) +
  #geom_point(aes(color = cluster, size = 4)) +
#highlights only the ones in the selected set
geom_point(data = subset(combined_data, task %in% display_limitations_set), aes(color = "firebrick1", size = 4)) +
geom_label(
  data = subset(combined_data, task %in% display_limitations_set),
  aes(
    x = PC1,
    y = PC2,
    label = task
  ),
  nudge_y = 0.1,
  size = 4
) +
 theme_minimal(base_size = 18) + theme(legend.position = "none")  

p

ggsave(plot = p, filename = '../images/task-map_with_new_task_opportunities_highlighted.png')
```

Create a cool 3D version
```{r}
plot_ly(
  x = combined_data$PC1,
  y = combined_data$PC2,
  z = combined_data$PC3,
  type = "scatter3d",
  mode = "markers", # can use mode = "text"
  text = combined_data$task ,
  color = combined_data$cluster
)
```

Create synthetic dependent variable based on the clusters
```{r}
tasks_with_dv <- subset(combined_data, task %in% max_diff_set) %>%
  mutate(
    synergy = as.factor(ifelse(cluster == 3 | cluster == 2, 1, 0))
  )
combined_data <- combined_data %>%
  mutate(
    synergy = as.factor(ifelse(cluster == 3 | cluster == 2, 1, 0))
  )
```

# Fitting and Visualizing Models for the Task Map.

```{r}
x <- combined_data %>% select(PC1, PC2, synergy, task)
train <- tasks_with_dv %>% select(PC1, PC2, synergy, task)
model <- train %>% svm(synergy ~ PC1 + PC2, data = ., kernel = "linear")

svmplot <- decisionplot(model, x, class_var = "synergy") + 
  geom_point(data = train, aes(x = PC1, y = PC2, shape = synergy), color = "darkolivegreen2", show.legend = F) +
  geom_label(data = train, aes(label = task ), nudge_y = 0.1, nudge_x = -0.1, size = 3) +
  labs(title = "SVM (Linear Kernel)") +
  theme_minimal(base_size = 12)

svmplot
  
ggsave('svmplot_synthetic_data.png')
```

```{r}
model <- train %>% knn3(synergy ~ PC1 + PC2, data = ., k = 1)

knnplot <- decisionplot(model, x, class_var = "synergy") +
  geom_point(data = train, aes(x = PC1, y = PC2, shape = synergy), color = "darkolivegreen2", show.legend = F) +
  geom_label(data = train, aes(label = task ), nudge_y = 0.1, nudge_x = -0.1, size = 3) +
  labs(title = "kNN (1 neighbor)") + 
  theme_minimal(base_size = 12)

knnplot
  
ggsave('knnplot_synthetic_data.png')
```